# Clear environment
rm(list = ls())

# Set working directory
setwd("/Users/georgemelios/Downloads/PSRM_Replication 3")

# Load required libraries
library(cjoint)
library(haven)
library(dplyr)
library(openxlsx)
library(survival)

# Load data and workbook
mydata <- read_dta("DATA/final_cjoint.dta")
mydata <- purrr::modify_if(mydata, is.labelled, as_factor)
wb <- loadWorkbook("TAB/tables.xlsx")

###### Table A11 ######

selected_columns <- c("cj_matchpid", "cj_race", "cj_diet", "cj_beauty", "cj_height", "cj_edu", "cj_affect", "cj_ideology")
data_a11 <- mydata[complete.cases(mydata[selected_columns]), ]

clogit_model <- clogit(choice ~ cj_matchpid + cj_affect + cj_ideology + cj_race + cj_edu + cj_diet + cj_beauty + cj_height + strata(caseid), data = data_a11)
clogit_sum <- summary(clogit_model)

# Extract coefficients
coefs <- clogit_sum$coefficients[, 1]
se <- clogit_sum$coefficients[, 3]

z_val <- qnorm(0.975) # z value for 95% CI
lower_ci <- coefs - z_val * se
upper_ci <- coefs + z_val * se

result_df <- data.frame(
  Coefficient = coefs,
  StandardError = se,
  LowerCI95 = lower_ci,
  UpperCI95 = upper_ci
)

# Add attribute and baseline names

Attribute <- c("In-party", "Tolerant", "Progressive", "White", "With degree", "Vegetarian", "Attractive", "Tall")
result_df <- data.frame(Attribute, result_df)
Baseline <- c("Out-party", "Intolerant", "Traditional", "Black", "Without degree", "Non-vegetarian", "Unattractive", "Short")

result_df <- data.frame(result_df[, 1:1, drop = FALSE], 
                         Baseline = Baseline, 
                        result_df[, (1+1):ncol(result_df), drop = FALSE])

result_df <- result_df %>% rename(Est. = Coefficient, SE = StandardError, LCI = LowerCI95, UCI = UpperCI95)

# Number of observations and clusters
num_observations <- nrow(data_a11)
num_clusters <- length(unique(data_a11$caseid))

result_df <- rbind(
  result_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)

# Export to Excel
addWorksheet(wb, "Table A11")
writeData(wb, "Table A11", result_df)

###### Table A12 ######

selected_columns <- c("cj_matchid", "cj_matchrace", "cj_matchdiet", "cj_matchbeau", "cj_matchhei", "cj_matched", "cj_matchaff_rob", "cj_matchpid")
match_data <- mydata[complete.cases(mydata[selected_columns]), ]

# Run regression
tolerance_rob <- amce(choice ~ cj_matchpid + cj_matchaff_rob + cj_matchid + cj_matchrace +
                       cj_matched + cj_matchdiet + cj_matchbeau + cj_matchhei,
                     data = match_data,  subset = NULL,
                     respondent.id = "caseid", cluster = TRUE, na.ignore = TRUE, weights = NULL, baselines = NULL)

# Transform the results into a data frame for table
tolerance_rob_df <- do.call(rbind, lapply(tolerance_rob$estimates, 
                                      function(x) data.frame(Est. = x[1, 1], SE = x[2, 1])))

# Order the attributes
order_vec <- c("cjmatchpid", "cjmatchaffrob", "cjmatchid", "cjmatchrace",
               "cjmatched", "cjmatchdiet", "cjmatchbeau", "cjmatchhei")
tolerance_rob_df <- tolerance_rob_df[match(order_vec, rownames(tolerance_rob_df)), ]

# Add attribute names
Attribute <- c("Partisanship Match", "Tolerance Match", "Ideology Match", "Race Match", "Education Match", "Diet Match", "Beauty Match", "Height Match")
tolerance_rob_df <- data.frame(Attribute, tolerance_rob_df)

# Calculate the 95% confidence intervals
tolerance_rob_df$LCI <- tolerance_rob_df$Est. - 1.96 * tolerance_rob_df$SE
tolerance_rob_df$UCI <- tolerance_rob_df$Est. + 1.96 * tolerance_rob_df$SE

# Round
tolerance_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(tolerance_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(match_data)
num_clusters <- length(unique(match_data$caseid))

tolerance_rob_df <- rbind(
  tolerance_rob_df,
  data.frame(Attribute = "Number of Observations", Est. = num_observations, SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Est. = num_clusters, SE = "", LCI = "", UCI = "")
)

# Export to Excel
addWorksheet(wb, "Table A12")
writeData(wb, "Table A12", tolerance_rob_df)

###### Table A13 ######

selected_columns <- c("cj_matchpid", "cj_affect", "cj_ideology", "cj_race", "cj_edu", "cj_diet", "cj_beauty", "cj_height", "round")
round_df <- mydata[complete.cases(mydata[selected_columns]), ]

# Run regression
round_rob <- amce(choice ~ cj_matchpid:round + cj_affect:round + cj_ideology:round 
                  + cj_race:round + cj_edu:round + cj_diet:round 
                  + cj_beauty:round + cj_height:round,
                  data = round_df,  subset = NULL,
                  respondent.id = "caseid", cluster = TRUE, na.ignore = TRUE, weights = NULL, baselines = NULL)

round_rob_df <- summary.amce(round_rob)$acie[, c("Attribute", "Level", "Estimate", "Std. Err")]

# Add attribute and baseline names

Attribute <- c("In-party", "In-party", "Tolerant", "Tolerant", 
               "Progressive", "Progressive", "White", "White",
               "Degree", "Degree", "Vegetarian", "Vegetarian",
               "Attractive", "Attractive", "Tall", "Tall")

round_rob_df$Attribute <- Attribute

Baseline <- c("Out-party", "Out-party", "Intolerant", "Intolerant",
              "Traditional", "Traditional", "Black", "Black",
              "No degree", "No degree", "Non-vegetarian", "Non-vegetarian",
              "Unattractive", "Unattractive", "Short", "Short")

round_rob_df$Level <- Baseline

round_rob_df <- round_rob_df %>%
  rename(Baseline = Level,
         Est. = Estimate,
         SE = "Std. Err")

# Calculate the 95% confidence intervals
round_rob_df$LCI <- round_rob_df$Est. - 1.96 * round_rob_df$SE
round_rob_df$UCI <- round_rob_df$Est. + 1.96 * round_rob_df$SE

# Round
round_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(round_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(round_df)
num_clusters <- length(unique(round_df$caseid))

round_rob_df <- rbind(
  round_rob_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)

main_effects <- round_rob_df[1:10, ]
int_effects <- round_rob_df[11:26, ]
# Export to Excel
addWorksheet(wb, "Table A13")
writeData(wb, "Table A13", int_effects)

###### Table A14 ######

selected_columns <- c("cj_matchpid", "cj_affect", "cj_ideology", "cj_race", "cj_edu", "cj_diet", "cj_beauty", "cj_height")
duration_df <- mydata[complete.cases(mydata[selected_columns]), ]

# Run regression
duration_rob <- lm(choice ~  cj_matchpid*durationinseconds + cj_affect*durationinseconds + cj_ideology*durationinseconds + 
                     cj_race*durationinseconds + cj_edu*durationinseconds + cj_diet*durationinseconds 
                   + cj_beauty*durationinseconds + cj_height*durationinseconds,
                   data = duration_df)

duration_rob_df <- data.frame(
  Est. = coef(duration_rob),
  SE = diag(sqrt(vcovHC(duration_rob, type = "HC1", cluster = "group", group = duration_df$caseid)))
)
duration_rob_df <- duration_rob_df[-(1),]

# Keep interactions and add attribute and baseline names
duration_rob_df <- duration_rob_df[(nrow(duration_rob_df) - 7):nrow(duration_rob_df), ]

Attribute <- c("In-party","Tolerant", 
               "Progressive", "White", 
               "With degree", "Vegetarian", 
               "Attractive", "Tall")

duration_rob_df <- data.frame(Attribute, duration_rob_df)

Baseline <- c("Out-party", "Intolerant", "Traditional", "Black", "Without degree", 
              "Non-vegetarian", "Unattractive", "Short")

duration_rob_df <- data.frame(duration_rob_df[, 1:1, drop = FALSE], 
                              Baseline = Baseline, 
                              duration_rob_df[, (1+1):ncol(duration_rob_df), drop = FALSE])

# Calculate the 95% confidence intervals
duration_rob_df$LCI <- duration_rob_df$Est. - 1.96 * duration_rob_df$SE
duration_rob_df$UCI <- duration_rob_df$Est. + 1.96 * duration_rob_df$SE

# Round
duration_rob_df <- duration_rob_df %>%
  mutate(across(c(Est., LCI, UCI, SE), as.numeric))

duration_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(duration_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(duration_df)
num_clusters <- length(unique(duration_df$caseid))

duration_rob_df <- rbind(
  duration_rob_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)

# Export to Excel
addWorksheet(wb, "Table A14")
writeData(wb, "Table A14", duration_rob_df)

###### Table A15 ######

#1. Relationship status

selected_columns <- c("cj_matchpid", "cj_affect", "cj_ideology", "cj_race", "cj_edu", "cj_diet", "cj_beauty", "cj_height", "rel_short")
rel_df <- mydata[complete.cases(mydata[selected_columns]), ]

rel_df$rel_short <- ifelse(rel_df$rel_short == 1, 0, 
                           ifelse(rel_df$rel_short == 2, 1, rel_df$rel_short))
# Run regression
rel_rob <- amce(choice ~ cj_matchpid:rel_short + cj_affect:rel_short + cj_ideology:rel_short 
                + cj_race:rel_short + cj_edu:rel_short + cj_diet:rel_short 
                + cj_beauty:rel_short + cj_height:rel_short,
                data = rel_df,  subset = NULL,
                respondent.id = "caseid", cluster = TRUE, na.ignore = TRUE, weights = NULL, baselines = NULL)

rel_rob_df <- summary.amce(rel_rob)$acie[, c("Attribute", "Level", "Estimate", "Std. Err")]

# Add attribute and baseline names

Attribute <- c("In-party", "Tolerant", 
               "Progressive", "White", 
               "Degree", "Vegetarian", 
               "Attractive", "Tall")

rel_rob_df$Attribute <- Attribute

Baseline <- c("Out-party", "Intolerant", 
              "Traditional", "Black", 
              "No degree", "Non-vegetarian", 
              "Unattractive", "Short")

rel_rob_df$Level <- Baseline

rel_rob_df <- rel_rob_df %>%
  rename(Baseline = Level,
         Est. = Estimate,
         SE = "Std. Err")

# Calculate the 95% confidence intervals
rel_rob_df$LCI <- rel_rob_df$Est. - 1.96 * rel_rob_df$SE
rel_rob_df$UCI <- rel_rob_df$Est. + 1.96 * rel_rob_df$SE

# Round
rel_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(rel_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(rel_df)
num_clusters <- length(unique(rel_df$caseid))

rel_rob_df <- rbind(
  rel_rob_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)
rel_rob_df
# Panel name
rel_rob_df <- rel_rob_df %>%
  add_row(Attribute = "Panel A: Single (ref: in a relationship)", .before = 1) 

#2. Age

selected_columns <- c("cj_matchpid", "cj_affect", "cj_ideology", 
                      "cj_race", "cj_edu", "cj_diet", 
                      "cj_beauty", "cj_height", "age")
age_df <- mydata[complete.cases(mydata[selected_columns]), ]

# Run regression
age_rob <- lm(choice ~ cj_matchpid*age + cj_affect*age + cj_ideology*age + 
                cj_race*age + cj_edu*age + cj_diet*age + cj_beauty*age + cj_height*age,
              data = age_df)

age_rob_df <- data.frame(
  Est. = coef(age_rob),
  SE = diag(sqrt(vcovHC(age_rob, type = "HC1", cluster = "group", group = age_df$caseid)))
)

age_rob_df <- age_rob_df[(nrow(age_rob_df) - 7):nrow(age_rob_df), ]

# Add attribute and baseline names

Attribute <- c("In-party", "Tolerant", 
               "Progressive", "White", 
               "With degree", "Vegetarian", 
               "Attractive", "Tall")

age_rob_df <- data.frame(Attribute, age_rob_df)

Baseline <- c("Out-party", "Intolerant", "Traditional", "Black", "Without degree", 
              "Non-vegetarian", "Unattractive", "Short")

age_rob_df <- data.frame(age_rob_df[, 1:1, drop = FALSE], 
                     Baseline = Baseline, 
                     age_rob_df[, (1+1):ncol(age_rob_df), drop = FALSE])

# Calculate the 95% confidence intervals
age_rob_df$LCI <- age_rob_df$Est. - 1.96 * age_rob_df$SE
age_rob_df$UCI <- age_rob_df$Est. + 1.96 * age_rob_df$SE

# Round
age_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(age_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(age_df)
num_clusters <- length(unique(age_df$caseid))

age_rob_df <- rbind(
  age_rob_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)

# Panel name
age_rob_df <- age_rob_df %>%
  add_row(Attribute = "Panel B: Age", .before = 1) 

#3. Education

selected_columns <- c("cj_matchpid", "cj_affect", "cj_ideology", 
                      "cj_race", "cj_edu", "cj_diet", 
                      "cj_beauty", "cj_height", "degree")
edu_df <- mydata[complete.cases(mydata[selected_columns]), ]

# Run regression
edu_rob <- amce(choice ~ cj_matchpid:degree + cj_affect:degree + cj_ideology:degree 
                + cj_race:degree + cj_edu:degree + cj_diet:degree 
                + cj_beauty:degree + cj_height:degree,
                data = edu_df,  subset = NULL,
                respondent.id = "caseid", cluster = TRUE, na.ignore = TRUE, weights = NULL, baselines = NULL)

edu_rob_df <- summary.amce(edu_rob)$acie[, c("Attribute", "Level", "Estimate", "Std. Err")]

# Add attribute and baseline names

Attribute <- c("In-party", "Tolerant", 
               "Progressive", "White", 
               "Degree", "Vegetarian", 
               "Attractive", "Tall")

edu_rob_df$Attribute <- Attribute

Baseline <- c("Out-party", "Intolerant", 
              "Traditional", "Black", 
              "No degree", "Non-vegetarian", 
              "Unattractive", "Short")

edu_rob_df$Level <- Baseline

edu_rob_df <- edu_rob_df %>%
  rename(Baseline = Level,
         Est. = Estimate,
         SE = "Std. Err")

# Calculate the 95% confidence intervals
edu_rob_df$LCI <- edu_rob_df$Est. - 1.96 * edu_rob_df$SE
edu_rob_df$UCI <- edu_rob_df$Est. + 1.96 * edu_rob_df$SE

# Round
edu_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(edu_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(edu_df)
num_clusters <- length(unique(edu_df$caseid))

edu_rob_df <- rbind(
  edu_rob_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)

# Panel name
edu_rob_df <- edu_rob_df %>%
  add_row(Attribute = "Panel C: With a degree (ref: no degree)", .before = 1) 

#4. Combine 
combined_df_dem <- rbind(rel_rob_df, age_rob_df, edu_rob_df)

# Export to Excel
addWorksheet(wb, "Table A15")
writeData(wb, "Table A15", combined_df_dem)

###### Table A16 ######

#1. Independents

selected_columns <- c("cj_party", "cj_affect", "cj_ideology", 
                      "cj_race", "cj_edu", "cj_diet", 
                      "cj_beauty", "cj_height", "Independent")
ind_df <- mydata[complete.cases(mydata[selected_columns]), ]

# Run regression
ind_rob <- amce(choice ~ cj_party:Independent + cj_affect:Independent + cj_ideology:Independent 
                + cj_race:Independent + cj_edu:Independent + cj_diet:Independent 
                + cj_beauty:Independent + cj_height:Independent,
                data = ind_df,  subset = NULL,
                respondent.id = "caseid", cluster = TRUE, 
                na.ignore = TRUE, weights = NULL, baselines = NULL)

ind_rob_df <- summary.amce(ind_rob)$acie[, c("Attribute", "Level", "Estimate", "Std. Err")]

# Add attribute and baseline names

Attribute <- c("Labour", "Tolerant", 
               "Progressive", "White", 
               "With degree", "Vegetarian", 
               "Attractive", "Tall")

ind_rob_df$Attribute <- Attribute

Baseline <- c("Tory", "Intolerant", 
              "Traditional", "Black", 
              "Without degree", "Non-vegetarian", 
              "Unattractive", "Short")

ind_rob_df$Level <- Baseline

ind_rob_df <- ind_rob_df %>%
  rename(Baseline = Level,
         Est. = Estimate,
         SE = "Std. Err")

# Calculate the 95% confidence intervals
ind_rob_df$LCI <- ind_rob_df$Est. - 1.96 * ind_rob_df$SE
ind_rob_df$UCI <- ind_rob_df$Est. + 1.96 * ind_rob_df$SE

# Round
ind_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(ind_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(ind_df)
num_clusters <- length(unique(ind_df$caseid))

ind_rob_df <- rbind(
  ind_rob_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)

#2. Extreme partisans

selected_columns <- c("cj_matchpid", "cj_affect", "cj_ideology", 
                      "cj_race", "cj_edu", "cj_diet", 
                      "cj_beauty", "cj_height", "extreme_partisans")
ext_df <- mydata[complete.cases(mydata[selected_columns]), ]

# Run regression
ext_rob <- amce(choice ~ cj_matchpid:extreme_partisans + cj_affect:extreme_partisans + cj_ideology:extreme_partisans 
                + cj_race:extreme_partisans + cj_edu:extreme_partisans + cj_diet:extreme_partisans 
                + cj_beauty:extreme_partisans + cj_height:extreme_partisans,
                data = ext_df,  subset = NULL,
                respondent.id = "caseid", cluster = TRUE, na.ignore = TRUE, weights = NULL, baselines = NULL)

ext_rob_df <- summary.amce(ext_rob)$acie[, c("Attribute", "Level", "Estimate", "Std. Err")]

# Add attribute and baseline names

Attribute <- c("In-party", "Tolerant", "Progressive", "White", "With degree", "Vegetarian", "Attractive", "Tall")
ext_rob_df$Attribute <- Attribute

Baseline <- c("Out-party", "Intolerant", "Traditional", "Black", "Without degree", "Non-vegetarian", "Unattractive", "Short")
ext_rob_df$Level <- Baseline

ext_rob_df <- ext_rob_df %>%
  rename(Baseline = Level,
         Est. = Estimate,
         SE = "Std. Err")

# Calculate the 95% confidence intervals
ext_rob_df$LCI <- ext_rob_df$Est. - 1.96 * ext_rob_df$SE
ext_rob_df$UCI <- ext_rob_df$Est. + 1.96 * ext_rob_df$SE

# Round
ext_rob_df[, c("SE", "Est.", "LCI", "UCI")] <- round(ext_rob_df[, c("SE", "Est.", "LCI", "UCI")], 3)

# Number of observations and clusters
num_observations <- nrow(ext_df)
num_clusters <- length(unique(ext_df$caseid))

ext_rob_df <- rbind(
  ext_rob_df,
  data.frame(Attribute = "Number of Observations", Baseline = num_observations, Est. = "", SE = "", LCI = "", UCI = ""),
  data.frame(Attribute = "Number of Clusters", Baseline = num_clusters, Est. = "", SE = "", LCI = "", UCI = "")
)

#3. Combine 
combined_df_party <- rbind(ind_rob_df, ext_rob_df)

# Export to Excel
addWorksheet(wb, "Table A16")
writeData(wb, "Table A16", combined_df_party)

###### Table A17 ######

# Load the data
photo_data <- read_dta("DATA/photo_survey_F.dta")

# Filter out rows with NA 
rated_photos <- photo_data[!is.na(photo_data$final_rating), ]

# Generate all possible pairwise combinations of 'group'
combinations <- expand.grid(Photo_A = unique(photo_data$group),
                            Photo_B = unique(photo_data$group))

# Filter out combinations of the same group and where Photo_A > Photo_B
combinations <- combinations[combinations$Photo_A < combinations$Photo_B, ]

# Count respondents who rated both photos for each combination
combinations$N <- map2_int(combinations$Photo_A, combinations$Photo_B, 
                           ~sum(rated_photos$id[rated_photos$group == .x] %in% rated_photos$id[rated_photos$group == .y]))

# Calculate DIF (A–B), S.D., % NEG, and 99% CI
combinations <- combinations %>%
  rowwise() %>%
  mutate(
    DIF = mean(rated_photos$final_rating[rated_photos$group == Photo_A & rated_photos$id %in% rated_photos$id[rated_photos$group == Photo_B]]) - 
      mean(rated_photos$final_rating[rated_photos$group == Photo_B & rated_photos$id %in% rated_photos$id[rated_photos$group == Photo_A]]),
    SD = sd(rated_photos$final_rating[rated_photos$group == Photo_A & rated_photos$id %in% rated_photos$id[rated_photos$group == Photo_B]] -
              rated_photos$final_rating[rated_photos$group == Photo_B & rated_photos$id %in% rated_photos$id[rated_photos$group == Photo_A]]),
    NEG = sum(rated_photos$final_rating[rated_photos$group == Photo_A & rated_photos$id %in% rated_photos$id[rated_photos$group == Photo_B]] > 
                rated_photos$final_rating[rated_photos$group == Photo_B & rated_photos$id %in% rated_photos$id[rated_photos$group == Photo_A]]) / N * 100,
    CI = qnorm(0.995)*SD/sqrt(N)
  )

# Create a data frame of the desired pairs
desired_pairs <- data.frame(
  Photo_B = c(53, 46, 41, 54, 29, 44, 52, 55, 58, 47, 49, 59, 56, 45, 20, 43),
  Photo_A = c(27, 25, 19, 21, 26, 24, 51, 50, 12, 28, 15, 48, 11, 23, 17, 42)
)

# Subset the 'combinations' data frame
subset_combinations <- combinations %>%
  semi_join(desired_pairs, by = c("Photo_A", "Photo_B"))

# Create sample table for appendix
selected_rows <- c(1, 2, 3, 12, 13, 8)
sample_data <- subset_combinations[selected_rows, ]

# Add "combination" column
sample_data$combination <- 1:nrow(sample_data)

# Rename NEG to %POS and select specific columns
sample_data <- sample_data[, c("combination", "N", "DIF", "SD", "NEG")]
colnames(sample_data)[5] <- "%POS"

sample_data$DIF <- round(sample_data$DIF, 2)
sample_data$`%POS` <- round(sample_data$`%POS`, 0)

addWorksheet(wb, "Table A17")
writeData(wb, "Table A17", sample_data)

###### Save workbook ######
saveWorkbook(wb, "TAB/tables.xlsx", overwrite = TRUE)









